Data Extraction with Python, extract from the datasets the proportion of the vote won by the Democratic candidate.
library(reticulate)
import pandas as pd
import numpy as np
pres = pd.read_csv('data/countypres_2000-2016.csv')
unemp = pd.read_excel('data/Unemployment.xls', header = 7)
pres['proportion'] = pres['candidatevotes']/pres['totalvotes']
pres = pres[(pres.year == 2008)|(pres.year == 2012)|(pres.year == 2016)]
pres = pres[pres.party == 'democrat']
unemp = unemp[['area_name','Unemployment_rate_2008','Unemployment_rate_2012','Unemployment_rate_2016']]
pres.head()
## year state state_po ... totalvotes version proportion
## 21815 2008 Alabama AL ... 23641 20191203 0.257730
## 21818 2008 Alabama AL ... 81413 20191203 0.238119
## 21821 2008 Alabama AL ... 11630 20191203 0.489854
## 21824 2008 Alabama AL ... 8644 20191203 0.265965
## 21827 2008 Alabama AL ... 24267 20191203 0.145135
##
## [5 rows x 12 columns]
unemp.head()
## area_name ... Unemployment_rate_2016
## 0 United States ... 4.869007
## 1 Alabama ... 5.800000
## 2 Autauga County, AL ... 5.100000
## 3 Baldwin County, AL ... 5.300000
## 4 Barbour County, AL ... 8.300000
##
## [5 rows x 4 columns]
Create 6 Choropleths with R
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.4 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(sf)
## Linking to GEOS 3.8.1, GDAL 3.1.1, PROJ 6.3.1
library(readxl)
library(spData)
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(sp)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(ggpubr)
# read the two tables
pres <- read_csv('data/countypres_2000-2016.csv')
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## year = col_double(),
## state = col_character(),
## state_po = col_character(),
## county = col_character(),
## FIPS = col_double(),
## office = col_character(),
## candidate = col_character(),
## party = col_character(),
## candidatevotes = col_double(),
## totalvotes = col_double(),
## version = col_double()
## )
pres$FIPS <- as.character(pres$FIPS)
unemp <- read_excel("data/Unemployment.xls", range = cell_limits(c(8, 1), c(NA, NA)))
unemp <- unemp %>%
select(FIPStxt, Unemployment_rate_2008, Unemployment_rate_2012, Unemployment_rate_2016)
# county
county <- st_read('data/tl_2019_us_county-1/tl_2019_us_county.shp')
## Reading layer `tl_2019_us_county' from data source `/Users/Mary/Desktop/ANLY 503/anly503-fall2020-a6-mengyu14/data/tl_2019_us_county-1/tl_2019_us_county.shp' using driver `ESRI Shapefile'
## Simple feature collection with 3233 features and 17 fields
## geometry type: MULTIPOLYGON
## dimension: XY
## bbox: xmin: -179.2311 ymin: -14.60181 xmax: 179.8597 ymax: 71.43979
## geographic CRS: NAD83
county <- county %>%
filter(STATEFP == '06'|STATEFP == 42|STATEFP == 48)
TX
txp <- pres %>%
filter((year == 2008|year == 2012|year == 2016) & party == 'democrat' & state == 'Texas')
txp['proportion'] = txp['candidatevotes']/txp['totalvotes']
txp <- txp %>%
left_join(county, by = c('FIPS'='GEOID'))
T11 <- txp %>%
filter(year == 2008)
T12 <- txp %>%
filter(year == 2012)
T13 <- txp %>%
filter(year == 2016)
T11 <- ggplot(T11, aes(geometry = geometry)) +
geom_sf(aes(fill = proportion)) +
scale_fill_distiller(name = "Proportion of Votes", palette = "Blues", direction = 1) +
ggtitle("Proportion of Votes for Democrat Candidate of TX in 2008")
T12 <- ggplot(T12, aes(geometry = geometry)) +
geom_sf(aes(fill = proportion)) +
scale_fill_distiller(name = "Proportion of Votes", palette = "Blues", direction = 1) +
ggtitle("Proportion of Votes for Democrat Candidate of TX in 2012")
T13 <- ggplot(T13,aes(geometry = geometry)) +
geom_sf(aes(fill = proportion)) +
scale_fill_distiller(name = "Proportion of Votes", palette = "Blues", direction = 1) +
ggtitle("Proportion of Votes for Democrat Candidate of TX in 2016")
txun <- unemp %>%
filter(substr(FIPStxt, 1, 2) == '48') %>%
left_join(county, by = c('FIPStxt' = 'GEOID')) %>%
tail(-1)
T21 <- ggplot(txun, aes(geometry = geometry)) +
geom_sf(aes(fill = Unemployment_rate_2008)) +
scale_fill_distiller(name="Unemployment rate", palette = "Reds", direction = 1) +
ggtitle("Unemployment rate of TX in 2008")
T22 <- ggplot(txun, aes(geometry = geometry)) +
geom_sf(aes(fill = Unemployment_rate_2012)) +
scale_fill_distiller(name="Unemployment rate", palette = "Reds", direction = 1) +
ggtitle("Unemployment rate of TX in 2012")
T23 <- ggplot(txun, aes(geometry = geometry)) +
geom_sf(aes(fill = Unemployment_rate_2016)) +
scale_fill_distiller(name="Unemployment rate", palette = "Reds", direction = 1) +
ggtitle("Unemployment rate of TX in 2016")
ggarrange(T11, T21, T12, T22, T13, T23, ncol = 2, nrow=3, align = 'hv', widths = c(6, 6, 6, 6, 6, 6))

CA
cap <- pres %>%
filter((year == 2008|year == 2012|year == 2016) & party == 'democrat' & state == 'California')
cap['proportion'] = cap['candidatevotes']/cap['totalvotes']
cap$FIPS <- paste0('0', cap$FIPS)
cap <- cap %>%
left_join(county, by = c('FIPS' = 'GEOID'))
C11 <- cap %>%
filter(year == 2008)
C12 <- cap %>%
filter(year == 2012)
C13 <- cap %>%
filter(year == 2016)
C11 <- ggplot(C11, aes(geometry = geometry)) +
geom_sf(aes(fill = proportion)) +
scale_fill_distiller(name = "Proportion of Votes", palette = "Blues", direction = 1) +
ggtitle("Proportion of Votes for Democrat Candidate of CA in 2008")
C12 <- ggplot(C12, aes(geometry = geometry)) +
geom_sf(aes(fill = proportion)) +
scale_fill_distiller(name = "Proportion of Votes", palette = "Blues", direction = 1) +
ggtitle("Proportion of Votes for Democrat Candidate of CA in 2012")
C13 <- ggplot(C13, aes(geometry = geometry)) +
geom_sf(aes(fill = proportion)) +
scale_fill_distiller(name = "Proportion of Votes", palette = "Blues", direction = 1) +
ggtitle("Proportion of Votes for Democrat Candidate of CA in 2016")
caun <- unemp %>%
filter(substr(FIPStxt, 1, 2) == '06') %>%
left_join(county, by = c('FIPStxt' = 'GEOID')) %>%
tail(-1)
C21 <- ggplot(caun, aes(geometry = geometry)) +
geom_sf(aes(fill = Unemployment_rate_2008)) +
scale_fill_distiller(name = "Unemployment rate", palette = "Reds", direction = 1) +
ggtitle("Unemployment rate of CA in 2008")
C22 <- ggplot(caun, aes(geometry = geometry)) +
geom_sf(aes(fill = Unemployment_rate_2012)) +
scale_fill_distiller(name = "Unemployment rate", palette = "Reds", direction = 1) +
ggtitle("Unemployment rate of CA in 2012")
C23 <- ggplot(caun, aes(geometry = geometry)) +
geom_sf(aes(fill = Unemployment_rate_2016)) +
scale_fill_distiller(name = "Unemployment rate", palette = "Reds", direction = 1) +
ggtitle("Unemployment rate of CA in 2016")
ggarrange(C11, C21, C12, C22, C13, C23, ncol = 2, nrow=3, align = 'hv', widths = c(6, 6, 6, 6, 6, 6))

PA
pap <- pres %>%
filter((year == 2008|year == 2012|year == 2016) & party == 'democrat' & state == 'Pennsylvania')
pap['proportion'] = pap['candidatevotes']/pap['totalvotes']
pap <- pap %>%
left_join(county, by = c('FIPS' = 'GEOID'))
P11 <- pap %>%
filter(year == 2008)
P12 <- pap %>%
filter(year == 2012)
P13 <- pap %>%
filter(year == 2016)
P11 <- ggplot(P11, aes(geometry = geometry)) +
geom_sf(aes(fill = proportion)) +
scale_fill_distiller(name = "Proportion of Votes", palette = "Blues", direction = 1) +
ggtitle("Proportion of Votes for Democrat Candidate of PA in 2008")
P12 <- ggplot(P12, aes(geometry = geometry)) +
geom_sf(aes(fill = proportion)) +
scale_fill_distiller(name = "Proportion of Votes", palette = "Blues", direction = 1) +
ggtitle("Proportion of Votes for Democrat Candidate of PA in 2012")
P13 <- ggplot(P13, aes(geometry = geometry)) +
geom_sf(aes(fill = proportion)) +
scale_fill_distiller(name = "Proportion of Votes", palette = "Blues", direction = 1) +
ggtitle("Proportion of Votes for Democrat Candidate of PA in 2016")
paun <- unemp %>%
filter(substr(FIPStxt, 1, 2) == '42') %>%
left_join(county, by = c('FIPStxt' = 'GEOID')) %>%
tail(-1)
P21 <- ggplot(paun, aes(geometry = geometry)) +
geom_sf(aes(fill = Unemployment_rate_2008)) +
scale_fill_distiller(name = "Unemployment rate", palette = "Reds", direction = 1) +
ggtitle("Unemployment rate of PA in 2008")
P22 <- ggplot(paun, aes(geometry = geometry)) +
geom_sf(aes(fill = Unemployment_rate_2012)) +
scale_fill_distiller(name = "Unemployment rate", palette = "Reds", direction = 1) +
ggtitle("Unemployment rate of PA in 2012")
P23 <- ggplot(paun, aes(geometry = geometry)) +
geom_sf(aes(fill = Unemployment_rate_2016)) +
scale_fill_distiller(name = "Unemployment rate", palette = "Reds", direction = 1) +
ggtitle("Unemployment rate of PA in 2016")
ggarrange(P11, P21, P12, P22, P13, P23, ncol = 2, nrow=3, align = 'hv', widths = c(6, 6, 6, 6, 6, 6))
